#  -*- coding:utf-8 -*- 
"""
@ author: 罗金盛
@ time: 2023/11/16 
@ file: NLTK进行命名实体识别举例.py

"""
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk

# nltk.download('averaged_perceptron_tagger')
# nltk.download('maxent_ne_chunker')
# nltk.download('words')

# 提供文本
text = "FIFA was founded in 1904 to oversee international competition among the national associations of Belgium, Denmark, France, Germany, the Netherlands, Spain, Sweden, and Switzerland. Headquartered in Zürich, its membership now comprises 211 national associations. Member countries must each also be members of one of the six regional confederations into which the world is divided: Africa, Asia, Europe, North & Central America and the Caribbean, Oceania, and South America."

# 分词
tokens = word_tokenize(text)

# 词性标注
tagged = pos_tag(tokens)

# 命名实体识别
entities = ne_chunk(tagged)

# 打印结果
print(entities)
